This notebook is an exercise of Nueral Network for Computer Vision course at Afeka College of Engeneering.
It involves a Multi-Label Object Detection task using Kaggle's Face Mask Detection dataset.
Table of Contents:
Submitted By:
import os
import sys
from datetime import datetime
import numpy as np
import pandas as pd
import cv2
import torch
from torch.utils.data import DataLoader
import torch.optim as optim
import torchvision
from sklearn import metrics
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from matplotlib import pyplot as plt
import seaborn as sns
from data_handler.FaceMaskData import FaceMaskData
from data_handler.FaceMaskDataset import FaceMaskDataset
from trainer import *
from metrics.metrics import *
from criterion.criterion import *
assert torch.cuda.is_available()
import json
class CFG:
seed = 42
model_name = 'faster_rcnn'
pretrained = True
img_width = 480
img_height = 480
batch_size = 10
n_epochs = 10
n_folds = 5
drop_rate = 0.
train_size = 0.90
nms_thresh = 0.2
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
num_workers = 4
num_classes = 4
imgs_path = 'images' # images dir
msks_path = 'annotations' # annotations dir
multilabelKFold = True
optimizer = optim.SGD
optimizer_dict = { 'lr': 0.005,
'momentum': 0.9,
'weight_decay': 0.0005 }
scheduler = optim.lr_scheduler.StepLR
scheduler_dict = { 'step_size': 5,
'gamma': 0.1 }
def save(path):
save_path = path + '/model_dict.json'
with open(save_path, 'w') as f:
for key, val in CFG.__dict__.items():
f.write('{}\t\t= {}\n'.format(key, val))
# optimizer = optim.Adam
# optimizer_dict = None
# scheduler = optim.lr_scheduler.StepLR
# scheduler_dict = { 'step_size': 5,
# 'gamma': 0.1 }
# optimizer = optim.ASGD
# optimizer_dict = None
# scheduler = optim.lr_scheduler.ExponentialLR
# scheduler_dict = { gamma: 0.9 }
np.random.seed(CFG.seed)
Load the data into a dataset
df = pd.read_csv(os.path.join(sys.path[0], 'annotation.csv'))
if CFG.multilabelKFold and CFG.n_folds > 1:
faceMasksData = FaceMaskData(CFG.imgs_path, CFG.msks_path, multilabelKFold=True, df_file=df)
(x_train, y_train, l_train), (x_test, y_test, l_test) = faceMasksData.load_data(
train_size=CFG.train_size,
drop_rate=CFG.drop_rate,
seed=CFG.seed)
else:
faceMasksData = FaceMaskData(CFG.imgs_path, CFG.msks_path)
(x_train, y_train), (x_test, y_test) = faceMasksData.load_data(
train_size=CFG.train_size,
drop_rate=CFG.drop_rate,
seed=CFG.seed)
print('Training contains {} samples which is {:g}% of the data'.format(len(x_train), len(x_train) * 100 / (len(x_train) + len(x_test))))
print('Testing contains {} samples which is {:g}% of the data'.format(len(x_test), len(x_test) * 100 / (len(x_train) + len(x_test))))
Training contains 767 samples which is 89.9179% of the data Testing contains 86 samples which is 10.0821% of the data
def collate_fn(batch):
return tuple(zip(*batch))
testset = FaceMaskDataset(x_test, y_test, CFG.imgs_path, CFG.msks_path, CFG.img_width, CFG.img_height, transforms=get_transformer('test'))
test_loader = DataLoader(dataset=testset, batch_size=CFG.batch_size, shuffle=False, num_workers=CFG.num_workers, collate_fn=collate_fn)
df.head()
| xmin | ymin | xmax | ymax | name | file | width | height | class | Xcent | Ycent | boxW | boxH | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 28 | 55 | 46 | 71 | with_mask | maksssksksss737 | 400 | 226 | 0 | 0.09250 | 0.278761 | 0.0450 | 0.070796 |
| 1 | 98 | 62 | 111 | 78 | with_mask | maksssksksss737 | 400 | 226 | 0 | 0.26125 | 0.309735 | 0.0325 | 0.070796 |
| 2 | 159 | 50 | 193 | 90 | mask_weared_incorrect | maksssksksss737 | 400 | 226 | 1 | 0.44000 | 0.309735 | 0.0850 | 0.176991 |
| 3 | 293 | 59 | 313 | 80 | with_mask | maksssksksss737 | 400 | 226 | 0 | 0.75750 | 0.307522 | 0.0500 | 0.092920 |
| 4 | 352 | 51 | 372 | 72 | with_mask | maksssksksss737 | 400 | 226 | 0 | 0.90500 | 0.272124 | 0.0500 | 0.092920 |
faces = len(df['file'].unique())
annotates = len(df)
print('There are total {} images in the data'.format(faces))
print('There are total {} annotated faces in the data'.format(annotates))
print('Average of {:.5f} per image'.format(annotates/faces))
There are total 853 images in the data There are total 4072 annotated faces in the data Average of 4.77374 per image
label_df = pd.DataFrame(columns=['label', 'count'])
for k, v in df['name'].value_counts().to_dict().items():
label_df = label_df.append({'label':k, 'count':v}, ignore_index=True)
display(label_df)
fig, ax = plt.subplots()
ax.bar(label_df['label'], label_df['count'], width=0.4)
plt.title('Label Count')
for index, data in enumerate(label_df['count']):
plt.text(x=index-0.1, y=data+1, s=data , fontdict=dict(fontsize=20))
plt.tight_layout()
plt.show()
| label | count | |
|---|---|---|
| 0 | with_mask | 3232 |
| 1 | without_mask | 717 |
| 2 | mask_weared_incorrect | 123 |
def get_class_distribute(df):
classes = ["with_mask","without_mask","mask_weared_incorrect"]
classes_bit_shifter_amount = {
"with_mask":0,
"without_mask":1,
"mask_weared_incorrect":2,
}
dists = np.zeros(8)
for name in faceMasksData.images:
img_name = name.split('.')[0]
img_classes = df[df['file']==img_name]['name'].unique()
index = 0
for cls in img_classes:
index |= 1 << classes_bit_shifter_amount[cls]
dists[index] += 1
if dists[0] == 0:
return dists[1:]
return dists
dist_names = ['None', 'with_mask', 'without_mask', 'with_mask\nwithout_mask',
'mask_weared_incorrect', 'with_mask\nmask_weared_incorrect',
'without_mask\nmask_weared_incorrect',
'with_mask\nwithout_mask\nmask_weared_incorrect']
dist_values = get_class_distribute(df)
if len(dist_values) == 7:
dist_names = dist_names[1:]
plt.figure(figsize=(15, 5))
axs = plt.bar(dist_names, dist_values)
for ax in axs:
plt.text(ax.get_xy()[0] + 0.3, ax.get_height(), int(ax.get_height()), fontsize='x-large')
plt.xticks(rotation=30, ha='right')
plt.title('Classes per Image')
plt.show()
We can understand from these two cahrts that the data is very imbalanced.
For example, there are only 123 faces with an incorrect weared mask, those faces appearing in (21 + 30 + 4 + 42) = 97 images, which is (97 / 853) = 11% of all the images.
On the other hand, faces with masks appearing in (768 / 853) = 90% of all the images.
Therefore, in later part of the training, we will have to split the train and validation sets in a good managers.
def get_num_faces(df, image_name_col):
max_faces = find_max_faces(df,image_name_col)
arr = [0] * (max_faces + 1)
faces_count =1
current_img =df.at[0,image_name_col]
for i in range(1,len(df[1:])):
if current_img == df.at[i,image_name_col]:
faces_count = faces_count + 1
else:
arr[faces_count] = arr[faces_count] +1
faces_count =1
current_img =df.at[i,image_name_col]
df = pd.DataFrame(columns=['faces', 'count'])
for i, val in enumerate(arr):
if val > 0:
df = df.append({'faces': i, 'count': val}, ignore_index=True)
return df
def find_max_faces(df,image_name_col):
max_faces=1
faces_count =1
current_img =df.at[0,image_name_col]
for i in range(1,len(df[1:])):
if current_img == df.at[i,image_name_col]:
faces_count = faces_count +1
else:
if faces_count > max_faces:
max_faces = faces_count
current_img = df.at[i,image_name_col]
faces_count =1
return max_faces
face_dist_df = get_num_faces(df, 'file')
ax = face_dist_df.plot.bar(x='faces', y='count', figsize=(15,5), title='Faces per Image')
for idx, label in enumerate(list(face_dist_df.faces)):
val = int(face_dist_df[face_dist_df['faces']==label]['count'])
ax.annotate(val,
(idx-0.2, val),
xytext=(0, 15),
textcoords='offset points')
print('STD of faces in the images: {:.5}'.format(face_dist_df['count'].std()))
STD of faces in the images: 58.229
As have seen above, the data is tvery imbalaned.
There are inconsistence amount of faces per image, and high differences in the total amount of appearance of each class.
For better training, we would like to use Cross-Validation.
We encounter with two main probelms: first, our data is imbalanced, therefore using regular spliting methods, will might cause some classes to not appear at all in some folds, second, out task involve Multi-label Object Detection, and the well-known spliting algorithm won't work with.
The first problem is encountered with Stratified KFold splits the folds by preserving the percentage of samples for each label.
The second problem cannot be encountered with scikit-learn's algorithm, therefore we will use MultilabelStratifiedKFold implentation which taken from here.
# self.classes = [None, 'without_mask','with_mask','mask_weared_incorrect']
# MultilabelStratifiedKFold needs to get the Y as binary for each class
x_names = [ name.split('.')[0] for name in x_train ]
y_classes = []
for name in x_names:
classes = list(df[df['file']==name]['name'].unique())
indice = np.zeros(4, dtype=np.uint8)
for c in classes:
index = testset.classes.index(c)
indice[index] = 1
y_classes.append(list(indice))
print(y_classes[:5])
[[0, 0, 1, 0], [0, 0, 1, 0], [0, 0, 1, 0], [0, 0, 1, 0], [0, 0, 1, 0]]
df_folds = pd.DataFrame()
mskf = MultilabelStratifiedKFold(CFG.n_folds)
for fold_id, (train_idx, test_idx) in enumerate(mskf.split(x_names, y_classes), start=1):
classes = np.zeros(len(testset.classes))
for idx in train_idx:
name = x_names[idx]
for cl, val in df[df['file']==name]['name'].value_counts().to_dict().items():
class_index = testset.classes.index(cl)
classes[class_index] += val
for cl, val in df['name'].value_counts().to_dict().items():
class_index = testset.classes.index(cl)
df_folds = df_folds.append({ 'fold': int(fold_id),
'class':cl,
'count': classes[class_index] ,
}, ignore_index=True)
print('fold {}: train_size={}, test_size={}'.format(fold_id, len(train_idx), len(test_idx)))
# df_folds = df_folds.append({ 'fold': fold_id,
# 'train': len(train_idx),
# 'valid': len(test_idx),
# 'without_mask': classes[testset.classes.index('without_mask')],
# 'with_mask': classes[testset.classes.index('with_mask')],
# 'mask_weared_incorrect': classes[testset.classes.index('mask_weared_incorrect')]
# }, ignore_index=True)
display(df_folds)
g = ax = sns.catplot(x='fold', y='count', hue='class', data=df_folds, kind='bar', legend=True)
g.fig.set_size_inches(15, 8)
g.fig.subplots_adjust(top=0.81, right=0.86)
# extract the matplotlib axes_subplot objects from the FacetGrid
ax = g.facet_axis(0, 0)
# iterate through the axes containers
for c in ax.containers:
labels = [int(v.get_height()) for v in c]
ax.bar_label(c, labels=labels, label_type='edge')
fold 1: train_size=616, test_size=151 fold 2: train_size=613, test_size=154 fold 3: train_size=615, test_size=152 fold 4: train_size=612, test_size=155 fold 5: train_size=612, test_size=155
| fold | class | count | |
|---|---|---|---|
| 0 | 1.0 | with_mask | 2348.0 |
| 1 | 1.0 | without_mask | 567.0 |
| 2 | 1.0 | mask_weared_incorrect | 92.0 |
| 3 | 2.0 | with_mask | 2255.0 |
| 4 | 2.0 | without_mask | 510.0 |
| 5 | 2.0 | mask_weared_incorrect | 90.0 |
| 6 | 3.0 | with_mask | 2383.0 |
| 7 | 3.0 | without_mask | 483.0 |
| 8 | 3.0 | mask_weared_incorrect | 87.0 |
| 9 | 4.0 | with_mask | 2369.0 |
| 10 | 4.0 | without_mask | 551.0 |
| 11 | 4.0 | mask_weared_incorrect | 89.0 |
| 12 | 5.0 | with_mask | 2205.0 |
| 13 | 5.0 | without_mask | 557.0 |
| 14 | 5.0 | mask_weared_incorrect | 90.0 |
From the information above, it is possible to see that MultilabelStratifiedKFold split the images into almost identical train-test sizes in each fold, while preserving the amount of labels from each class within the different folds.
def norm(img):
img = np.array(img, dtype=np.float32)
img -= img.min()
img /= img.max()
return img
def get_annotated_img(img, annt, is_pred=False):
img = norm(np.array(np.transpose(img, (1, 2, 0)))) * 255.0 # multiple by 255 as in the dataset we divide it
# needed here because the image come from the dataset in values of [0, 1]
# and the annotations are in values of [0, 255] and plt should get images
# with values of [0, 1], therefore a normalizing of the images is needed
num_faces = len(annt['boxes'])
for i in range(num_faces):
box, cat = annt['boxes'][i], annt['labels'][i]
if is_pred:
score = annt['scores'][i]
xmin, ymin, xmax, ymax = np.array(box, dtype=np.int32)
start_point = (xmin, ymin)
end_point = (xmax, ymax)
# [None, 'without_mask','with_mask','mask_weared_incorrect']
color = (0, 0, 0)
if cat == 2:
color = (0, 255, 0) # green
elif cat == 3:
color = (0, 0, 255) # blue
elif cat == 1:
color = (255, 0, 0) # red
thickness = 1
img = cv2.rectangle(img, start_point, end_point, color, thickness)
font = cv2.FONT_HERSHEY_SIMPLEX
fontScale = 1 / 3
if not is_pred:
img = cv2.putText(img, '{}'.format(faceMasksData.classes[cat]), start_point, font, fontScale, color = (0, 0, 0), thickness=2)
img = cv2.putText(img, '{}'.format(faceMasksData.classes[cat]), start_point, font, fontScale, color, thickness)
else:
img = cv2.putText(img, '{:.2f}'.format(score), start_point, font, fontScale, color = (0, 0, 0), thickness=2)
img = cv2.putText(img, '{:.2f}'.format(score), start_point, font, fontScale, color, thickness)
return img
def show_augmented_samples(dataloader, model=None):
samples, annotations = next(iter(dataloader)) # get the first batch
amount_samples = min(12, len(samples)) # number of examples will be at most 12
for i, (img, ant) in enumerate(zip(samples, annotations)):
if i >= amount_samples:
break
plt.figure(figsize=(10, 10))
# plt.subplot(amount_samples, 1, i + 1)
img = get_annotated_img(img, ant)
plt.imshow(norm(img))
plt.axis('off')
plt.tight_layout()
plt.show()
show_augmented_samples(test_loader)
def get_model(num_classes, pretrained=True):
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=pretrained) # get model
in_features = model.roi_heads.box_predictor.cls_score.in_features # get input size of last layer
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) # regenerate the last layer
return model
model = get_model(num_classes=CFG.num_classes, pretrained=CFG.pretrained)
# model.roi_heads.fastrcnn_loss = custom_fastrcnn_loss
# torchvision.models.detection.roi_heads.fastrcnn_loss = custom_fastrcnn_loss
datetime_srt = datetime.today().strftime("%d-%m-%y_%H:%M")
run_path = os.path.join(sys.path[0], 'runs', datetime_srt)
if CFG.multilabelKFold and CFG.n_folds > 1:
train(model, x_train, (y_train, l_train), run_path, CFG)
else:
train(model, x_train, y_train, run_path, CFG)
This running path is: `/home/linuxgpu/anaconda3/envs/lab1/Face-Mask-Detection/runs/06-11-21_14:49` Fold 1 of 5 Epoch 1 of 10:
train: 0%| | 0/62 [00:00<?, ?it/s]/home/linuxgpu/anaconda3/envs/lab1/lib/python3.9/site-packages/torch/functional.py:445: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at /opt/conda/conda-bld/pytorch_1634272204863/work/aten/src/ATen/native/TensorShape.cpp:2157.) return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined] train: 100%|██████████| 62/62 [01:34<00:00, 1.52s/it] valid: 100%|██████████| 16/16 [00:12<00:00, 1.33it/s]
train: loss_classifier=13.09552 loss_box_reg=14.30862 loss_objectness=6.47676 loss_rpn_box_reg=1.76916 total loss=35.65005 valid: loss_classifier=2.17426 loss_box_reg=3.29323 loss_objectness=0.38296 loss_rpn_box_reg=0.24210 total loss=6.09255
Model saved. Loss < PrevLoss (6.09255 < inf) Epoch 2 of 10:
train: 100%|██████████| 62/62 [01:31<00:00, 1.47s/it] valid: 100%|██████████| 16/16 [00:11<00:00, 1.35it/s]
train: loss_classifier=8.27749 loss_box_reg=12.61442 loss_objectness=1.29895 loss_rpn_box_reg=0.94417 total loss=23.13504 valid: loss_classifier=1.77119 loss_box_reg=3.21242 loss_objectness=0.26894 loss_rpn_box_reg=0.21633 total loss=5.46888
Model saved. Loss < PrevLoss (5.46888 < 6.09255) Epoch 3 of 10:
train: 100%|██████████| 62/62 [01:32<00:00, 1.50s/it] valid: 100%|██████████| 16/16 [00:11<00:00, 1.34it/s]
train: loss_classifier=6.57114 loss_box_reg=11.52484 loss_objectness=0.78454 loss_rpn_box_reg=0.76301 total loss=19.64354 valid: loss_classifier=1.49072 loss_box_reg=2.74159 loss_objectness=0.20460 loss_rpn_box_reg=0.17820 total loss=4.61512
Model saved. Loss < PrevLoss (4.61512 < 5.46888) Epoch 4 of 10:
train: 100%|██████████| 62/62 [01:32<00:00, 1.49s/it] valid: 100%|██████████| 16/16 [00:11<00:00, 1.37it/s]
train: loss_classifier=5.55278 loss_box_reg=10.28885 loss_objectness=0.42510 loss_rpn_box_reg=0.64418 total loss=16.91091 valid: loss_classifier=1.48414 loss_box_reg=2.86749 loss_objectness=0.23033 loss_rpn_box_reg=0.18435 total loss=4.76630
Epoch 5 of 10:
train: 100%|██████████| 62/62 [01:32<00:00, 1.49s/it] valid: 100%|██████████| 16/16 [00:11<00:00, 1.36it/s]
train: loss_classifier=4.76619 loss_box_reg=9.35557 loss_objectness=0.29327 loss_rpn_box_reg=0.54724 total loss=14.96227 valid: loss_classifier=1.50454 loss_box_reg=2.62833 loss_objectness=0.23349 loss_rpn_box_reg=0.18571 total loss=4.55206
Model saved. Loss < PrevLoss (4.55206 < 4.61512) Epoch 6 of 10:
train: 100%|██████████| 62/62 [01:35<00:00, 1.54s/it] valid: 100%|██████████| 16/16 [00:12<00:00, 1.24it/s]
train: loss_classifier=4.20206 loss_box_reg=7.91046 loss_objectness=0.22473 loss_rpn_box_reg=0.48617 total loss=12.82341 valid: loss_classifier=1.44803 loss_box_reg=2.50765 loss_objectness=0.26350 loss_rpn_box_reg=0.16856 total loss=4.38774
Model saved. Loss < PrevLoss (4.38774 < 4.55206) Epoch 7 of 10:
train: 100%|██████████| 62/62 [01:34<00:00, 1.52s/it] valid: 100%|██████████| 16/16 [00:12<00:00, 1.26it/s]
train: loss_classifier=4.06754 loss_box_reg=7.60325 loss_objectness=0.19964 loss_rpn_box_reg=0.54196 total loss=12.41239 valid: loss_classifier=1.42979 loss_box_reg=2.49548 loss_objectness=0.24213 loss_rpn_box_reg=0.17051 total loss=4.33791
Model saved. Loss < PrevLoss (4.33791 < 4.38774) Epoch 8 of 10:
train: 100%|██████████| 62/62 [01:34<00:00, 1.52s/it] valid: 100%|██████████| 16/16 [00:11<00:00, 1.38it/s]
train: loss_classifier=3.88398 loss_box_reg=7.37365 loss_objectness=0.20655 loss_rpn_box_reg=0.44166 total loss=11.90584 valid: loss_classifier=1.44501 loss_box_reg=2.51034 loss_objectness=0.26272 loss_rpn_box_reg=0.16720 total loss=4.38528
Epoch 9 of 10:
train: 100%|██████████| 62/62 [01:32<00:00, 1.50s/it] valid: 100%|██████████| 16/16 [00:12<00:00, 1.33it/s]
train: loss_classifier=3.80510 loss_box_reg=7.29909 loss_objectness=0.19045 loss_rpn_box_reg=0.43648 total loss=11.73111 valid: loss_classifier=1.48162 loss_box_reg=2.51576 loss_objectness=0.27210 loss_rpn_box_reg=0.17002 total loss=4.43949
Epoch 10 of 10:
train: 100%|██████████| 62/62 [01:32<00:00, 1.50s/it] valid: 100%|██████████| 16/16 [00:11<00:00, 1.39it/s]
train: loss_classifier=3.78596 loss_box_reg=7.15256 loss_objectness=0.18636 loss_rpn_box_reg=0.42816 total loss=11.55303 valid: loss_classifier=1.49275 loss_box_reg=2.52037 loss_objectness=0.29652 loss_rpn_box_reg=0.17070 total loss=4.48033
Fold 2 of 5 Epoch 1 of 10:
train: 100%|██████████| 62/62 [01:32<00:00, 1.50s/it] valid: 100%|██████████| 16/16 [00:12<00:00, 1.28it/s]
train: loss_classifier=4.50377 loss_box_reg=8.43735 loss_objectness=0.43612 loss_rpn_box_reg=0.53394 total loss=13.91118 valid: loss_classifier=1.12379 loss_box_reg=2.33065 loss_objectness=0.06920 loss_rpn_box_reg=0.23882 total loss=3.76246
Model saved. Loss < PrevLoss (3.76246 < 4.33791) Epoch 2 of 10:
train: 100%|██████████| 62/62 [01:45<00:00, 1.70s/it] valid: 100%|██████████| 16/16 [00:11<00:00, 1.36it/s]
train: loss_classifier=3.92338 loss_box_reg=7.96089 loss_objectness=0.24959 loss_rpn_box_reg=0.47727 total loss=12.61114 valid: loss_classifier=1.06888 loss_box_reg=2.46613 loss_objectness=0.06959 loss_rpn_box_reg=0.18101 total loss=3.78561
Epoch 3 of 10:
train: 100%|██████████| 62/62 [01:31<00:00, 1.48s/it] valid: 100%|██████████| 16/16 [00:11<00:00, 1.39it/s]
train: loss_classifier=3.49218 loss_box_reg=7.26537 loss_objectness=0.18878 loss_rpn_box_reg=0.40544 total loss=11.35176 valid: loss_classifier=1.10198 loss_box_reg=2.40741 loss_objectness=0.08056 loss_rpn_box_reg=0.16589 total loss=3.75584
Model saved. Loss < PrevLoss (3.75584 < 3.76246) Epoch 4 of 10:
train: 100%|██████████| 62/62 [01:30<00:00, 1.46s/it] valid: 100%|██████████| 16/16 [00:11<00:00, 1.38it/s]
train: loss_classifier=3.10647 loss_box_reg=6.71067 loss_objectness=0.15426 loss_rpn_box_reg=0.37155 total loss=10.34295 valid: loss_classifier=1.13780 loss_box_reg=2.55113 loss_objectness=0.07742 loss_rpn_box_reg=0.18335 total loss=3.94970
Epoch 5 of 10:
train: 100%|██████████| 62/62 [01:32<00:00, 1.50s/it] valid: 100%|██████████| 16/16 [00:11<00:00, 1.42it/s]
train: loss_classifier=2.84719 loss_box_reg=6.03317 loss_objectness=0.14476 loss_rpn_box_reg=0.34785 total loss=9.37297 valid: loss_classifier=1.09009 loss_box_reg=2.46081 loss_objectness=0.07584 loss_rpn_box_reg=0.16395 total loss=3.79069
Epoch 6 of 10:
train: 100%|██████████| 62/62 [01:34<00:00, 1.52s/it] valid: 100%|██████████| 16/16 [00:12<00:00, 1.33it/s]
train: loss_classifier=2.53557 loss_box_reg=5.06244 loss_objectness=0.12936 loss_rpn_box_reg=0.29237 total loss=8.01974 valid: loss_classifier=1.11588 loss_box_reg=2.22096 loss_objectness=0.09113 loss_rpn_box_reg=0.15908 total loss=3.58706
Model saved. Loss < PrevLoss (3.58706 < 3.75584) Epoch 7 of 10:
train: 100%|██████████| 62/62 [01:32<00:00, 1.50s/it] valid: 100%|██████████| 16/16 [00:11<00:00, 1.38it/s]
train: loss_classifier=2.37269 loss_box_reg=4.63720 loss_objectness=0.11732 loss_rpn_box_reg=0.27962 total loss=7.40683 valid: loss_classifier=1.10763 loss_box_reg=2.21397 loss_objectness=0.08270 loss_rpn_box_reg=0.15723 total loss=3.56152
Model saved. Loss < PrevLoss (3.56152 < 3.58706) Epoch 8 of 10:
train: 100%|██████████| 62/62 [01:32<00:00, 1.49s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.46it/s]
train: loss_classifier=2.29749 loss_box_reg=4.44123 loss_objectness=0.09922 loss_rpn_box_reg=0.27263 total loss=7.11056 valid: loss_classifier=1.11939 loss_box_reg=2.22945 loss_objectness=0.08857 loss_rpn_box_reg=0.15411 total loss=3.59152
Epoch 9 of 10:
train: 100%|██████████| 62/62 [01:24<00:00, 1.37s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.46it/s]
train: loss_classifier=2.26610 loss_box_reg=4.36657 loss_objectness=0.09551 loss_rpn_box_reg=0.26428 total loss=6.99246 valid: loss_classifier=1.08741 loss_box_reg=2.24189 loss_objectness=0.09547 loss_rpn_box_reg=0.16087 total loss=3.58565
Epoch 10 of 10:
train: 100%|██████████| 62/62 [01:23<00:00, 1.34s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.54it/s]
train: loss_classifier=2.22361 loss_box_reg=4.25592 loss_objectness=0.10983 loss_rpn_box_reg=0.26375 total loss=6.85311 valid: loss_classifier=1.11129 loss_box_reg=2.24493 loss_objectness=0.09465 loss_rpn_box_reg=0.16230 total loss=3.61317
Fold 3 of 5 Epoch 1 of 10:
train: 100%|██████████| 62/62 [01:26<00:00, 1.40s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.47it/s]
train: loss_classifier=2.89687 loss_box_reg=6.20155 loss_objectness=0.14844 loss_rpn_box_reg=0.37760 total loss=9.62447 valid: loss_classifier=0.73486 loss_box_reg=1.75741 loss_objectness=0.04458 loss_rpn_box_reg=0.12150 total loss=2.65835
Model saved. Loss < PrevLoss (2.65835 < 3.56152) Epoch 2 of 10:
train: 100%|██████████| 62/62 [01:26<00:00, 1.40s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.50it/s]
train: loss_classifier=2.67159 loss_box_reg=6.19900 loss_objectness=0.11107 loss_rpn_box_reg=0.34921 total loss=9.33086 valid: loss_classifier=0.71454 loss_box_reg=1.57436 loss_objectness=0.03332 loss_rpn_box_reg=0.10861 total loss=2.43084
Model saved. Loss < PrevLoss (2.43084 < 2.65835) Epoch 3 of 10:
train: 100%|██████████| 62/62 [01:24<00:00, 1.36s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.47it/s]
train: loss_classifier=2.38039 loss_box_reg=5.55141 loss_objectness=0.10323 loss_rpn_box_reg=0.30867 total loss=8.34369 valid: loss_classifier=0.69966 loss_box_reg=1.53685 loss_objectness=0.03933 loss_rpn_box_reg=0.12012 total loss=2.39597
Model saved. Loss < PrevLoss (2.39597 < 2.43084) Epoch 4 of 10:
train: 100%|██████████| 62/62 [01:28<00:00, 1.43s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.54it/s]
train: loss_classifier=2.21643 loss_box_reg=5.08136 loss_objectness=0.09878 loss_rpn_box_reg=0.28616 total loss=7.68273 valid: loss_classifier=0.73519 loss_box_reg=1.69871 loss_objectness=0.03205 loss_rpn_box_reg=0.11449 total loss=2.58044
Epoch 5 of 10:
train: 100%|██████████| 62/62 [01:28<00:00, 1.42s/it] valid: 100%|██████████| 16/16 [00:11<00:00, 1.43it/s]
train: loss_classifier=2.07299 loss_box_reg=4.98000 loss_objectness=0.09581 loss_rpn_box_reg=0.30230 total loss=7.45110 valid: loss_classifier=0.72116 loss_box_reg=1.58571 loss_objectness=0.05043 loss_rpn_box_reg=0.11075 total loss=2.46805
Epoch 6 of 10:
train: 100%|██████████| 62/62 [01:26<00:00, 1.39s/it] valid: 100%|██████████| 16/16 [00:11<00:00, 1.42it/s]
train: loss_classifier=1.87551 loss_box_reg=3.80362 loss_objectness=0.09192 loss_rpn_box_reg=0.24215 total loss=6.01320 valid: loss_classifier=0.66901 loss_box_reg=1.47746 loss_objectness=0.03594 loss_rpn_box_reg=0.10499 total loss=2.28740
Model saved. Loss < PrevLoss (2.28740 < 2.39597) Epoch 7 of 10:
train: 100%|██████████| 62/62 [01:24<00:00, 1.37s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.51it/s]
train: loss_classifier=1.79984 loss_box_reg=3.43565 loss_objectness=0.07269 loss_rpn_box_reg=0.22345 total loss=5.53162 valid: loss_classifier=0.67725 loss_box_reg=1.46490 loss_objectness=0.03635 loss_rpn_box_reg=0.10406 total loss=2.28256
Model saved. Loss < PrevLoss (2.28256 < 2.28740) Epoch 8 of 10:
train: 100%|██████████| 62/62 [01:24<00:00, 1.36s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.53it/s]
train: loss_classifier=1.72747 loss_box_reg=3.27402 loss_objectness=0.06579 loss_rpn_box_reg=0.21480 total loss=5.28209 valid: loss_classifier=0.66178 loss_box_reg=1.45591 loss_objectness=0.04385 loss_rpn_box_reg=0.10443 total loss=2.26597
Model saved. Loss < PrevLoss (2.26597 < 2.28256) Epoch 9 of 10:
train: 100%|██████████| 62/62 [01:24<00:00, 1.36s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.52it/s]
train: loss_classifier=1.72748 loss_box_reg=3.14646 loss_objectness=0.06355 loss_rpn_box_reg=0.21115 total loss=5.14864 valid: loss_classifier=0.66725 loss_box_reg=1.46970 loss_objectness=0.04367 loss_rpn_box_reg=0.10453 total loss=2.28515
Epoch 10 of 10:
train: 100%|██████████| 62/62 [01:26<00:00, 1.40s/it] valid: 100%|██████████| 16/16 [00:11<00:00, 1.44it/s]
train: loss_classifier=1.67097 loss_box_reg=3.06885 loss_objectness=0.06493 loss_rpn_box_reg=0.21177 total loss=5.01652 valid: loss_classifier=0.67274 loss_box_reg=1.45594 loss_objectness=0.04133 loss_rpn_box_reg=0.10411 total loss=2.27412
Fold 4 of 5 Epoch 1 of 10:
train: 100%|██████████| 62/62 [01:26<00:00, 1.39s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.50it/s]
train: loss_classifier=2.11340 loss_box_reg=4.55142 loss_objectness=0.10525 loss_rpn_box_reg=0.31822 total loss=7.08829 valid: loss_classifier=0.44227 loss_box_reg=1.26466 loss_objectness=0.01433 loss_rpn_box_reg=0.06309 total loss=1.78435
Model saved. Loss < PrevLoss (1.78435 < 2.26597) Epoch 2 of 10:
train: 100%|██████████| 62/62 [01:22<00:00, 1.33s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.48it/s]
train: loss_classifier=2.06942 loss_box_reg=4.76972 loss_objectness=0.11348 loss_rpn_box_reg=0.34587 total loss=7.29849 valid: loss_classifier=0.47479 loss_box_reg=1.21498 loss_objectness=0.01347 loss_rpn_box_reg=0.06223 total loss=1.76546
Model saved. Loss < PrevLoss (1.76546 < 1.78435) Epoch 3 of 10:
train: 100%|██████████| 62/62 [01:22<00:00, 1.34s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.48it/s]
train: loss_classifier=1.95938 loss_box_reg=4.68650 loss_objectness=0.10593 loss_rpn_box_reg=0.31972 total loss=7.07153 valid: loss_classifier=0.47734 loss_box_reg=1.18650 loss_objectness=0.01642 loss_rpn_box_reg=0.05824 total loss=1.73849
Model saved. Loss < PrevLoss (1.73849 < 1.76546) Epoch 4 of 10:
train: 100%|██████████| 62/62 [01:23<00:00, 1.35s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.50it/s]
train: loss_classifier=1.87435 loss_box_reg=4.32846 loss_objectness=0.08276 loss_rpn_box_reg=0.28809 total loss=6.57367 valid: loss_classifier=0.45570 loss_box_reg=1.15685 loss_objectness=0.01375 loss_rpn_box_reg=0.05259 total loss=1.67889
Model saved. Loss < PrevLoss (1.67889 < 1.73849) Epoch 5 of 10:
train: 100%|██████████| 62/62 [01:24<00:00, 1.36s/it] valid: 100%|██████████| 16/16 [00:11<00:00, 1.44it/s]
train: loss_classifier=1.81630 loss_box_reg=4.36573 loss_objectness=0.07903 loss_rpn_box_reg=0.27420 total loss=6.53526 valid: loss_classifier=0.45354 loss_box_reg=1.10040 loss_objectness=0.01891 loss_rpn_box_reg=0.05480 total loss=1.62765
Model saved. Loss < PrevLoss (1.62765 < 1.67889) Epoch 6 of 10:
train: 100%|██████████| 62/62 [01:23<00:00, 1.35s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.49it/s]
train: loss_classifier=1.58024 loss_box_reg=3.22727 loss_objectness=0.07715 loss_rpn_box_reg=0.22654 total loss=5.11120 valid: loss_classifier=0.40923 loss_box_reg=0.86455 loss_objectness=0.01754 loss_rpn_box_reg=0.04710 total loss=1.33841
Model saved. Loss < PrevLoss (1.33841 < 1.62765) Epoch 7 of 10:
train: 100%|██████████| 62/62 [01:24<00:00, 1.37s/it] valid: 100%|██████████| 16/16 [00:11<00:00, 1.41it/s]
train: loss_classifier=1.53529 loss_box_reg=2.84256 loss_objectness=0.06441 loss_rpn_box_reg=0.21638 total loss=4.65864 valid: loss_classifier=0.41812 loss_box_reg=0.84386 loss_objectness=0.01440 loss_rpn_box_reg=0.04727 total loss=1.32364
Model saved. Loss < PrevLoss (1.32364 < 1.33841) Epoch 8 of 10:
train: 100%|██████████| 62/62 [01:25<00:00, 1.38s/it] valid: 100%|██████████| 16/16 [00:11<00:00, 1.45it/s]
train: loss_classifier=1.51137 loss_box_reg=2.69805 loss_objectness=0.06309 loss_rpn_box_reg=0.20215 total loss=4.47466 valid: loss_classifier=0.40416 loss_box_reg=0.83252 loss_objectness=0.01011 loss_rpn_box_reg=0.04599 total loss=1.29277
Model saved. Loss < PrevLoss (1.29277 < 1.32364) Epoch 9 of 10:
train: 100%|██████████| 62/62 [01:23<00:00, 1.35s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.47it/s]
train: loss_classifier=1.47942 loss_box_reg=2.56491 loss_objectness=0.06090 loss_rpn_box_reg=0.19740 total loss=4.30264 valid: loss_classifier=0.39874 loss_box_reg=0.83226 loss_objectness=0.01360 loss_rpn_box_reg=0.04566 total loss=1.29026
Model saved. Loss < PrevLoss (1.29026 < 1.29277) Epoch 10 of 10:
train: 100%|██████████| 62/62 [01:24<00:00, 1.37s/it] valid: 100%|██████████| 16/16 [00:11<00:00, 1.37it/s]
train: loss_classifier=1.42254 loss_box_reg=2.45920 loss_objectness=0.06247 loss_rpn_box_reg=0.19528 total loss=4.13949 valid: loss_classifier=0.41553 loss_box_reg=0.83686 loss_objectness=0.01218 loss_rpn_box_reg=0.04533 total loss=1.30990
Fold 5 of 5 Epoch 1 of 10:
train: 100%|██████████| 62/62 [01:25<00:00, 1.38s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.52it/s]
train: loss_classifier=1.58808 loss_box_reg=3.48276 loss_objectness=0.07036 loss_rpn_box_reg=0.23994 total loss=5.38115 valid: loss_classifier=0.32115 loss_box_reg=0.66512 loss_objectness=0.00567 loss_rpn_box_reg=0.02800 total loss=1.01993
Model saved. Loss < PrevLoss (1.01993 < 1.29026) Epoch 2 of 10:
train: 100%|██████████| 62/62 [01:22<00:00, 1.32s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.52it/s]
train: loss_classifier=1.66466 loss_box_reg=3.95294 loss_objectness=0.06512 loss_rpn_box_reg=0.25663 total loss=5.93934 valid: loss_classifier=0.32579 loss_box_reg=0.71039 loss_objectness=0.01037 loss_rpn_box_reg=0.02809 total loss=1.07463
Epoch 3 of 10:
train: 100%|██████████| 62/62 [01:22<00:00, 1.33s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.53it/s]
train: loss_classifier=1.71953 loss_box_reg=4.17969 loss_objectness=0.07333 loss_rpn_box_reg=0.26904 total loss=6.24158 valid: loss_classifier=0.37932 loss_box_reg=1.06688 loss_objectness=0.00584 loss_rpn_box_reg=0.02922 total loss=1.48125
Epoch 4 of 10:
train: 100%|██████████| 62/62 [01:24<00:00, 1.36s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.49it/s]
train: loss_classifier=1.71015 loss_box_reg=4.00098 loss_objectness=0.07557 loss_rpn_box_reg=0.26906 total loss=6.05577 valid: loss_classifier=0.36370 loss_box_reg=0.83227 loss_objectness=0.01014 loss_rpn_box_reg=0.02840 total loss=1.23451
Epoch 5 of 10:
train: 100%|██████████| 62/62 [01:24<00:00, 1.37s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.51it/s]
train: loss_classifier=1.56490 loss_box_reg=3.53557 loss_objectness=0.06030 loss_rpn_box_reg=0.26154 total loss=5.42231 valid: loss_classifier=0.35821 loss_box_reg=0.83052 loss_objectness=0.00645 loss_rpn_box_reg=0.03323 total loss=1.22841
Epoch 6 of 10:
train: 100%|██████████| 62/62 [01:22<00:00, 1.34s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.51it/s]
train: loss_classifier=1.41556 loss_box_reg=2.71695 loss_objectness=0.06185 loss_rpn_box_reg=0.20954 total loss=4.40391 valid: loss_classifier=0.32813 loss_box_reg=0.57913 loss_objectness=0.00397 loss_rpn_box_reg=0.02540 total loss=0.93663
Model saved. Loss < PrevLoss (0.93663 < 1.01993) Epoch 7 of 10:
train: 100%|██████████| 62/62 [01:22<00:00, 1.33s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.52it/s]
train: loss_classifier=1.34533 loss_box_reg=2.32790 loss_objectness=0.05871 loss_rpn_box_reg=0.18833 total loss=3.92026 valid: loss_classifier=0.30259 loss_box_reg=0.56242 loss_objectness=0.00521 loss_rpn_box_reg=0.02500 total loss=0.89523
Model saved. Loss < PrevLoss (0.89523 < 0.93663) Epoch 8 of 10:
train: 100%|██████████| 62/62 [01:21<00:00, 1.32s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.49it/s]
train: loss_classifier=1.28455 loss_box_reg=2.16953 loss_objectness=0.05653 loss_rpn_box_reg=0.17990 total loss=3.69051 valid: loss_classifier=0.31037 loss_box_reg=0.56011 loss_objectness=0.00550 loss_rpn_box_reg=0.02514 total loss=0.90112
Epoch 9 of 10:
train: 100%|██████████| 62/62 [01:23<00:00, 1.35s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.47it/s]
train: loss_classifier=1.26543 loss_box_reg=2.04308 loss_objectness=0.06177 loss_rpn_box_reg=0.17549 total loss=3.54577 valid: loss_classifier=0.30355 loss_box_reg=0.56003 loss_objectness=0.00538 loss_rpn_box_reg=0.02471 total loss=0.89366
Model saved. Loss < PrevLoss (0.89366 < 0.89523) Epoch 10 of 10:
train: 100%|██████████| 62/62 [01:23<00:00, 1.34s/it] valid: 100%|██████████| 16/16 [00:10<00:00, 1.50it/s]
train: loss_classifier=1.24534 loss_box_reg=1.97624 loss_objectness=0.05237 loss_rpn_box_reg=0.17310 total loss=3.44704 valid: loss_classifier=0.29360 loss_box_reg=0.55926 loss_objectness=0.00614 loss_rpn_box_reg=0.02473 total loss=0.88372
Model saved. Loss < PrevLoss (0.88372 < 0.89366)
# run_path = os.path.join(sys.path[0], 'runs', '05-11-21_18:17')
# print(run_path)
# model_path = os.path.join(run_path, 'models', 'faster_rcnn_model.pth')
# model = get_model(4)
# model.load_state_dict(torch.load(model_path))
# map_writer = SummaryWriter('{}/logs/mAP'.format(run_path))
def plot_test_examples(model, loader, nms_threshold):
device = CFG.device
model = model.to(device).eval()
imgs, annts = next(iter(loader))
imgs = list(img.to(device) for img in imgs)
output = model(imgs)
amount_samples = min(12, len(imgs)) # number of examples will be at most 12
for i, (img, ant, pred) in enumerate(zip(imgs, annts, output)):
if i >= amount_samples:
break
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(14, 60))
axes[0].set_title('Ground Truth')
axes[1].set_title('Prediction')
axes[0].axis('off')
axes[1].axis('off')
annot_img = get_annotated_img(img.cpu().detach().numpy(), ant)
axes[0].imshow(norm(annot_img))
pred_thrs = get_pred_by_threshold(pred, nms_threshold)
pred_cpu = {k: v.cpu().detach().numpy() for k, v in pred_thrs.items()}
annot_img = get_annotated_img(img.cpu().detach().numpy(), pred_cpu, is_pred=True)
axes[1].imshow(norm(annot_img))
fig.tight_layout()
plt.show()
del imgs, pred, pred_thrs, pred_cpu
torch.cuda.empty_cache()
plot_test_examples(model, test_loader, nms_threshold=0.2)
mAP@[0.5:0.05:0.95]
def plot_precision_recall(df, iou_thresh, save_path=None):
auc_dict = dict()
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(25, 7))
plt.title('IOU Threshold {:.3f}'.format(iou_thresh))
for idx, c in enumerate(df['real_class'].unique()):
prec = df[df['real_class']==c]['precision']
recall = df[df['real_class']==c]['recall']
auc = metrics.auc(x=recall, y=prec)
auc_dict[c] = auc
axes[idx].plot(recall, prec)
axes[idx].set_title('Class {} AP={:.5f}'.format(faceMasksData.classes[c], auc))
axes[idx].grid()
axes[idx].set_ylabel('Precision')
axes[idx].set_xlabel('Recall')
fig.tight_layout()
plt.show()
if save_path:
fig.savefig('{}/AP@{:.3}.png'.format(save_path, iou_thresh))
return auc_dict
def plot_mAP_graph(iou_dict, save_path=None, iou_start=0.5, iou_step=0.05, iou_end=0.95):
x, y = zip(*sorted(iou_dict.items()))
fig = plt.figure(figsize=(25, 7))
plt.title('mAP Vs. IoU Threshold')
plt.plot(x, y, marker='o')
plt.tight_layout()
plt.grid()
plt.show()
save_path = '{}/mAP@[{:.3}:{:.3}:{:.3}].png'.format(save_path, iou_start, iou_step, iou_end)
fig.savefig(save_path)
def evaluate(model, loader, nms_thresh, iou_start=0.5, iou_step=0.05, iou_end=0.95, eps=1e-6):
metrics_path = '{}/metrics'.format(run_path)
df = get_iou_as_df(model, loader, nms_thresh)
auc_dict = dict()
for iou_trsh in np.arange(iou_start, iou_end + iou_step, iou_step):
df = calc_precision_recall(df, iou_trsh, metrics_path)
auc_dict[iou_trsh] = plot_precision_recall(df, iou_trsh, metrics_path)
iou_dict = calc_mAP_from_auc_dict(auc_dict)
classes_mAP = calc_mAP_per_class(auc_dict, iou_dict)
plot_mAP_graph(iou_dict, metrics_path, iou_start, iou_step, iou_end)
total_mAP = .0
for iou in iou_dict:
print('mAP@{:.3} =\t{:.5}'.format(iou, iou_dict[iou]))
print('\nmAP Per Class:')
for c, val in classes_mAP.items():
total_mAP += val
print('{}\t\tmAP@[0.5:0.05:0.95] =\t{:.5f}'.format(faceMasksData.classes[c], classes_mAP[c]))
total_mAP /= len(classes_mAP)
print('Total mAP@[0.5:0.05:0.95] =\t{:.5f}'.format(total_mAP))
evaluate(model, test_loader, CFG.nms_thresh)
evaluating IoU: 100%|██████████| 9/9 [00:07<00:00, 1.13it/s]
mAP@0.5 = 0.72425 mAP@0.55 = 0.72114 mAP@0.6 = 0.72064 mAP@0.65 = 0.71567 mAP@0.7 = 0.69772 mAP@0.75 = 0.63705 mAP@0.8 = 0.52426 mAP@0.85 = 0.36264 mAP@0.9 = 0.17647 mAP@0.95 = 0.023618 mAP Per Class: with_mask mAP@[0.5:0.05:0.95] = 0.75545 without_mask mAP@[0.5:0.05:0.95] = 0.67559 mask_weared_incorrect mAP@[0.5:0.05:0.95] = 0.16000 Total mAP@[0.5:0.05:0.95] = 0.53035